clean_if_bad_names <- function(x) {
if (any(str_detect(names(x), "[\\sA-Z()/-]"))) {
x <- clean_names(x)
x
}
x
}
# reset df
df <- penguins_raw
clean_if_bad_names(x = df)
## # A tibble: 344 × 17
## study_name sample_number species region island stage individual_id
## <chr> <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 PAL0708 1 Adelie Pengui… Anvers Torger… Adult, … N1A1
## 2 PAL0708 2 Adelie Pengui… Anvers Torger… Adult, … N1A2
## 3 PAL0708 3 Adelie Pengui… Anvers Torger… Adult, … N2A1
## 4 PAL0708 4 Adelie Pengui… Anvers Torger… Adult, … N2A2
## 5 PAL0708 5 Adelie Pengui… Anvers Torger… Adult, … N3A1
## 6 PAL0708 6 Adelie Pengui… Anvers Torger… Adult, … N3A2
## 7 PAL0708 7 Adelie Pengui… Anvers Torger… Adult, … N4A1
## 8 PAL0708 8 Adelie Pengui… Anvers Torger… Adult, … N4A2
## 9 PAL0708 9 Adelie Pengui… Anvers Torger… Adult, … N5A1
## 10 PAL0708 10 Adelie Pengui… Anvers Torger… Adult, … N5A2
## # … with 334 more rows, and 10 more variables: clutch_completion <chr>,
## # date_egg <date>, culmen_length_mm <dbl>, culmen_depth_mm <dbl>,
## # flipper_length_mm <dbl>, body_mass_g <dbl>, sex <chr>,
## # delta_15_n_o_oo <dbl>, delta_13_c_o_oo <dbl>, comments <chr>
clean_names() from the janitor package looks for more bad elements in names
How do we know?
😄 Pro tip: cmd+click on the function name below
janitor::clean_names()
# we don't really need to test first, just clean the column names
df <- penguins_raw %>% janitor::clean_names()
Select the data.frame method from the drop down
Then cmd click the internal function make_clean_names()
Now we see the internals on janitor’s make_clean_names(), we see it cleans up apostrophes and other things our function didn’t.
dplyr::arrange()
In the chunk above, the options in the gear icon do not provide a way for:
{r echo=TRUE, eval=FALSE}
which means to show the code but do not run the code.
😄 combine some functions into one function using purrr::compose()
not_in <- purrr::compose( # by default it runs bottom to top
`!`,
`%in%`
)
not_in(3, c(1:2))
## [1] TRUE
not_in(1, c(1:2))
## [1] FALSE
not_in("January", month.name)
## [1] FALSE
library(broom)
library(gt)
lm_peek <- purrr::compose(
.dir = "forward",
lm,
broom::tidy,
gt::gt,
~tab_header(.x, title = "Results Preview")
)
# this
lm_peek(culmen_depth_mm ~ flipper_length_mm + body_mass_g, data = df)
| Results Preview | ||||
|---|---|---|---|---|
| term | estimate | std.error | statistic | p.value |
| (Intercept) | 35.7949967216 | 1.7785312629 | 20.126155 | 7.917591e-60 |
| flipper_length_mm | -0.1006442960 | 0.0125602140 | -8.012944 | 1.823056e-14 |
| body_mass_g | 0.0003753533 | 0.0002202346 | 1.704334 | 8.923502e-02 |
😄 Pro tip: alt+up or alt+down to move lines
😄 Pro tip: ctl+alt+down or ctl+alt+up to have multi-line cursor
You try: add commas after starts_with() and mean
df %>%
summarise(
across(
starts_with("culmen")
mean
na.rm = TRUE
)
)
😄 Pro tip: double click one of the parentheses to highlight what it contains
div(
class = "outer",
div(
class = "container",
div(class = "data",
df %>%
filter(
str_detect(Species, "Chin"),
Sex == "MALE",
Island == "Dream"
) %>%
summarise(
across(
starts_with("culmen"),
mean,
na.rm = TRUE
)
)
)
)
)
Pro tip: highlight code, then in Code menu, select extract function
# in the code menu, select extract function
df %>%
filter(
Sex == "MALE"
) %>%
summarise(
mean_body_mass = mean(`Body Mass (g)`, na.rm = TRUE)
)
Comment and not
😄 Pro tip: highlight multiple lines of code, shift-ctl-c to comment all